#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import matplotlib.animation as animation
from scipy.integrate import odeint, ode, quad, trapz
from scipy import optimize
from scipy.spatial import distance
from mpl_toolkits.mplot3d import Axes3D
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
plt.style.use('seaborn')
from PIL import Image
from scipy.stats import entropy
from IPython.display import HTML
from tqdm import tqdm
import math
from algorithms import *
import scipy.sparse as sparse
import scipy.stats as stats

get_ipython().run_line_magic('matplotlib', 'inline')
import warnings
warnings.filterwarnings('ignore')


# We first test out OGDA for simple two player zero sum games (a simple example is Matching Pennies), before extending to the multiplayer case.

# In[2]:


G1 = np.array([[1, -1],[-1, 1]])
G2 = np.array([[2, -2],[-2, 2]])


# In[3]:


x_1 = np.array([[0.4, 0.6], [0.41, 0.59]])
y_1 = np.array([[0.6, 0.4], [0.59, 0.41]])


# In[4]:


data1 = runGDA2Player(G1,x_1,y_1, 5000, 0.1)
plt.plot(data1['x'][:,0], data1['y'][:,0]);


# In[5]:


nash1 = [0.5, 0.5]


# In[6]:


dist = np.log([np.linalg.norm(data1['x'][i]-nash1) + 0.000001 for i in range(len(data1['x']))])
plt.plot(dist[:1000]);


# In[7]:


graph1 = np.array([[0,1,0,0],
                   [-1,0,1,0],
                   [0,-1,0,1],
                   [0,0,-1,0]])


# In[8]:


G_list1 = []
for i in range(2):
    G_list1.append(G1)
    G_list1.append(G2)


# In[9]:


vals1 = np.array([[[0.4, 0.6], [0.41, 0.59]], [[0.6, 0.4], [0.59, 0.41]], [[0.3, 0.7], [0.31, 0.69]], [[0.8, 0.2], [0.79, 0.21]]])


# In[10]:


data2 = runGDANPlayer(G_list1, vals1, numsteps=2000, eta=0.1, graph=graph1, N=4, optimistic=True)


# In[11]:


plt.plot(data2['vals'][1]);


# In[12]:


dist_4player_MP = PlotDist(data2['vals'], 4, nash=[nash1, nash1, nash1, nash1])


# In[13]:


PlotTimeAvg(data2, 2, player=3);


# In[14]:


vals2 = np.random.rand(20, 1, 2)
for i in range(vals2.shape[0]):
    vals2[i] = vals2[i]/(np.sum(vals2[i]))
vals2 = np.repeat(vals2,2, axis=1)


# In[15]:


graph2 = np.zeros((20, 20))
for i in (range(19)):
    graph2[i][i+1] = 1
    graph2[i+1][i] = -1


# In[16]:


G_list2 = []
for i in range(10):
    G_list2.append(G1)
    G_list2.append(G2)


# In[17]:


data3 = runGDANPlayer(G_list2, vals2, numsteps=20000, eta=0.1, graph=graph2, N=20)


# In[18]:


dist_20player_MP = PlotDist(data3['vals'], 20, nash=[nash1 for i in range(20)])


# In[19]:


PlotTimeAvg(data3, 2, player=0);


# ## Experiments on Random Sequence Form Games

# ### Fully connected graph with 3 nodes

# In[20]:


graph3 = np.array([[0,1,1],
                    [-1,0,1],
                    [-1,-1,0]])


# We run the following function for each seed to find the best $\eta$ value.

# In[21]:


test_eta1 = FindBestEta(num_etas=10, dimension=3, graph=graph3, players=3, numsteps=2000, seed=6)


# In[22]:


rand_data_3players = RandomGameSimulation(num_simulations=5, dimension=3, graph=graph3, players=3, numsteps=1000, eta=[0.2,0.3,0.9,0.24,0.7], rand=False, seeds=[0,1,3,5,6])


# In[23]:


PlotTimeAvg(rand_data_3players[0], 3);


# ### Fully connected graph with 4 nodes

# In[24]:


graph4 = np.array([[0,1,1,1],
                   [-1,0,1,1],
                   [-1,-1,0,1],
                   [-1,-1,-1,0]])


# We run the following function for each seed to find the best $\eta$ value.

# In[25]:


test_eta2 = FindBestEta(num_etas=20, dimension=3, graph=graph4, players=4, numsteps=5000, seed=3, endpoints=(0.0001, 1.5))


# In[26]:


rand_data_4players = RandomGameSimulation(num_simulations=5, dimension=3, graph=graph4, players=4, numsteps=10000, eta=[0.19,1.184,0.106,1.685,0.369], rand=False, seeds=[0,3,7,8,14])


# In[27]:


PlotTimeAvg(data=rand_data_4players[0], dimension=3, player=3, num_to_plot=2000);


# ## Kuhn Poker - Simplified version of poker, where the deck only has 3 cards: J, Q and K.

# ### Rules 
# - Each player antes 1.
# - Each player is dealt one of the three cards, and the third is put aside unseen.
# - Player 1 can check or bet 1.
# - If P1 checks, P2 can check or bet.
# - If P2 checks there is a showdown for the pot of 2 (i.e. the higher card wins 1 from the other player).
# - If P2 bets then P1 can fold or call.
# - If P1 folds then P2 takes the pot of 3 (i.e. winning 1 from P1).
# - If P1 calls there is a showdown for the pot of 4 (i.e. the higher card wins 2 from the other player).
# - If P1 bets then P2 can fold or call.
# - If P2 folds then P1 takes the pot of 3 (i.e. winning 1 from P1).
# - If P2 calls there is a showdown for the pot of 4 (i.e. the higher card wins 2 from the other player).
# 
# First we convert to sequence form. This results in the following game:
# 

# In[28]:


G3 = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
               [0, -1/6, 0, 0, 0, -1/6, 0, 0, 0, 0, 0, 0, 0],
               [0, 0, 0, 1/6, -1/3, 0, 0, 1/6, -1/3, 0, 0, 0, 0],
               [0, 0, 0, 0, 0, -1/6, 0, 0, 0, 1/6, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 1/6, -1/3, 0, 0, 1/6, 1/3],
               [0, 1/6, 0, 0, 0, 0, 0, 0, 0, 1/6, 0, 0, 0],
               [0, 0, 0, 1/6, 1/3, 0, 0, 0, 0, 0, 0, 1/6, 1/3],
               [0, 0, -1/6, 0, 0, 0, -1/6, 0, 0, 0, 0, 0, 0],
               [0, 0, -1/3, 0, 0, 0, -1/3, 0, 0, 0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, -1/6, 0, 0, 0, -1/6, 0, 0],
               [0, 0, 0, 0, 0, 0, -1/3, 0, 0, 0, 2/3, 0, 0],
               [0, 0, -1/6, 0, 0, 0, 0, 0, 0, 0, -1/6, 0, 0],
               [0, 0, 1/3, 0, 0, 0, 0, 0, 0, 0, 2/3, 0, 0]])


# We run experiments on Kuhn poker games of up to 5 players. Empirically, we found that having too many players results in numerical errors due to the large number of matrix multiplications being performed at every iteration. An interesting future direction is to run truly large scale simulations on higher dimensional extensive form games.

# In[29]:


x = np.random.rand(13)
x_norm = x/np.sum(x)


# In[30]:


graph4 = np.array([[0,1,0,1],
                   [-1,0,1,0],
                   [0,-1,0,1],
                   [-1,0,-1,0]])


# In[31]:


vals_4player =  np.array([[x_norm, x_norm], [x_norm, x_norm], [x_norm, x_norm], [x_norm, x_norm]], dtype=object)
vals_5player =  np.array([[x_norm, x_norm], [x_norm, x_norm], [x_norm, x_norm], [x_norm, x_norm], [x_norm, x_norm]], dtype=object)


# In[32]:


game_list_4players = [G3, G3, G3, G3]
game_list_5players = [G3, G3, G3, G3, G3]
game_list_10players = [G3, G3, G3, G3, G3, G3, G3, G3, G3, G3]


# In[33]:


data_poker_4player = runGDANPlayer(game_list_4players, vals_4player, numsteps=50000, eta=0.33, graph=graph4, N=4)


# In[34]:


dist_poker_4player = PlotDist(data_poker_4player['vals'], 4, data_poker_4player['timeavg'])


# In[35]:


PlotTimeAvg(data_poker_4player, 4, player=3, );


# In[36]:


graph5 = np.array([[0,1,0,0,1],
                   [-1,0,1,0,0],
                   [0,-1,0,1,0],
                   [0,0,-1,0,1],
                   [-1,0,0,-1,0]])


# In[37]:


data_poker_5player = runGDANPlayer(game_list_5players, vals_5player, numsteps=100000, eta=0.32, graph=graph5, N=5)


# In[38]:


dist_poker_5player = PlotDist(data_poker_5player['vals'], 5, data_poker_5player['timeavg'])


# In[39]:


PlotTimeAvg(data_poker_5player, 13, player=3, num_to_plot=3000);


# In[ ]:





# In[ ]:




